install.packages("gapminder", repos = "http://cran.us.r-project.org")
## Installing package into '/Users/Leona/Library/R/3.5/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/4h/vqb8hww915jc23y3mxgq2y8w0000gn/T//RtmpUIvVxF/downloaded_packages
library(ggplot2)
library(gapminder)
#Excerpt of the Gapminder data on life expectancy, GDP per capita, and population by country.
#Look at data
gapminder
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
str(gapminder)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
p <- ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) # make the graph to an object
p
p + geom_point() # and then tell ggplot we want a scater plot
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) + geom_point() # same as above
Log Transformation
# what is the difference?
ggplot( data = gapminder, aes(x = log(gdpPercap), y = lifeExp)) + geom_point() #ln = log10
ggplot( data = gapminder, aes(x = log10(gdpPercap), y = lifeExp)) + geom_point()
p <- ggplot( data = gapminder, aes(x = log(gdpPercap), y = lifeExp))
p1 <- ggplot( data = gapminder, aes(x = log10(gdpPercap), y = lifeExp)) + geom_point() + scale_x_log10()
Color
p + geom_point( aes(color = continent) )
p2 <- ggplot( data = gapminder, aes(x= log10(gdpPercap), y = lifeExp)) + scale_x_log10() + geom_point(aes(color = continent))
p2
Reduce overplotting
?geom_point
ggplot( data = gapminder, aes(x= log10(gdpPercap), y = lifeExp)) + scale_x_log10() + geom_point(aes(color = continent, alpha = 0.3, size = 3))
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) + scale_x_log10() + geom_point(aes(color = continent), alpha = 0.3, size=3)
ggplot(data = gapminder, aes(x= log10(gdpPercap), y = lifeExp)) + scale_x_log10()
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) + scale_x_log10()